#	Andy Harris
#	Date: 10 December 2013 (Tuesday)
#	Purpose: Make figure 2 in paper

#	libraries
library(ggplot2)
library(grid)
library(stringr)
registerDoMC(12)
#	Functions
source("nameEst.R")
source("nameEstW.R")
#xsumx <- function(x){x/sum(x)}
#	Start Code

# precinct by county data

vr <- readRDS("allPrecinctsByCounty.Rdata")
freqs <- readRDS(file = "freqsUS.Rdata")
freqs <- freqs[,c("hisp","black", "white")]
freqs <- freqs[-which(rowSums(freqs) == 0),]

freqs[,1] <- freqs[,1]/sum(freqs[,1])
freqs[,2] <- freqs[,2]/sum(freqs[,2])
freqs[,3] <- freqs[,3]/sum(freqs[,3])
conds <- freqs/rowSums(freqs)

fc.prop <- foreach(ii = 1:length(vr), .combine = rbind) %dopar% {
	tmpvr <- vr[[ii]]
	tmpvr <- tmpvr[which(tmpvr$voter_status_desc %in% c("ACTIVE", "INACTIVE", "TEMPORARY")),]
	tmpvr$precinct_abbrv <- str_trim(tmpvr$precinct_abbrv)
	upct <- unique(tmpvr$precinct_abbrv)
	upct <- na.omit(upct[!(upct %in% "")])
	pct.store <- data.frame(matrix(NA, nrow = length(upct), ncol = 6))
	names(pct.store) <- c("county", "precinct", "rv", "hisp", "black", "white")
	for(jj in 1:length(upct)){
		tmppct <- tmpvr[which(tmpvr$precinct_abbrv == upct[jj]),]
		ethpr <- na.omit(conds[tmppct$last_name,])
#		out <- foreach(kk = 1:2, .combine = rbind) %dopar% {
		out <- prop.table(table(c('hisp', 'black', 'white')[apply(ethpr, 1, sample, x = 1:3, size = 1, replace = T)]))
		out <- out[c('hisp', 'black', 'white')]
#			return(tout)
#		}
		ret <- c(nrow(tmppct), out)
		names(ret)[1] <- "rv"
		pct.store[jj,3:6] <- ret
	}
	pct.store$precinct <- upct
	pct.store$county <- unique(tmpvr$county_desc)
	return(pct.store)
}
fc.prop <- fc.prop[order(fc.prop$county, fc.prop$precinct),]
fc2 <- fc.prop

freqs <- readRDS(file = "freqsUS.Rdata")
freqs <- freqs[,c("hisp","black", "white")]
freqs <- freqs[-which(rowSums(freqs) == 0),]

freqs[,1] <- freqs[,1]/sum(freqs[,1])
freqs[,2] <- freqs[,2]/sum(freqs[,2])
freqs[,3] <- freqs[,3]/sum(freqs[,3])
conds <- as.matrix(freqs)
nm.prop <- foreach(ii = 1:length(vr), .combine = rbind) %dopar% {
	tmpvr <- vr[[ii]]
	tmpvr <- tmpvr[which(tmpvr$voter_status_desc %in% c("ACTIVE", "INACTIVE", "TEMPORARY")),]#we remove "REMOVED" and "DENIED" voters.
	tmpvr$precinct_abbrv <- str_trim(tmpvr$precinct_abbrv)
	upct <- unique(tmpvr$precinct_abbrv)
	upct <- na.omit(upct[!(upct %in% "")])
	out <- foreach(jj = 1:length(upct), .combine = rbind) %do% {
		tmppct <- tmpvr[which(tmpvr$precinct_abbrv == upct[jj]),]
		targ <- tmppct$last_name
		yy <- c(prop.table(table(targ)))
		ests <- nameEstW(cond =conds, targ = yy)
		ret <- c(nrow(tmppct), ests)
		return(ret)
	}
	colnames(out)[1] <- "rv"
	out <- as.data.frame(out)
	out$precinct <- upct
	out$county <- unique(tmpvr$county_desc)
	out <- out[order(out$precinct),]
	return(out)
}
nm.prop <- nm.prop[order(nm.prop$county, nm.prop$precinct),]
nm2 <- nm.prop

#generating and merging precinct level vote/abs outcomes for EI
precinct.to <- readRDS(file = "precinctTO.Rdata")
precinct.to <- precinct.to[order(precinct.to$county, precinct.to$precinct),]
precinct.to$precinct <- str_replace_all(string = precinct.to$precinct, pattern = "^0{1,}", replacement = "")
precinct.to$uid <- paste(precinct.to$county, precinct.to$precinct, sep = "-")

fc.prop$precinct <- str_replace_all(string = fc.prop$precinct, pattern = "^0{1,}", replacement = "")
nm.prop$precinct <- str_replace_all(string = nm.prop$precinct, pattern = "^0{1,}", replacement = "")

fc.prop$uid <- paste(fc.prop$county, fc.prop$precinct, sep = "-")
nm.prop$uid <- paste(nm.prop$county, nm.prop$precinct, sep = "-")

precinct.to <- precinct.to[-which(!(precinct.to$uid %in% nm.prop$uid)),]
nm.prop <- nm.prop[-which(!(nm.prop$uid %in% precinct.to$uid)),]
fc.prop <- fc.prop[-which(!(fc.prop$uid %in% precinct.to$uid)),]

row.names(precinct.to) <- precinct.to$uid
row.names(nm.prop) <- nm.prop$uid
row.names(fc.prop) <- fc.prop$uid

nm.prop <- nm.prop[precinct.to$uid,]
fc.prop <- fc.prop[precinct.to$uid,]
#insert a zero to vector in case one of the groups does not get drawn in classification.
fc.prop$hisp[is.na(fc.prop$hisp)] <- 0
fc.prop$black[is.na(fc.prop$black)] <- 0
fc.prop$white[is.na(fc.prop$white)] <- 0
nm.count <- round(nm.prop[,'rv'] * nm.prop[,c('hisp', 'black', 'white')])
fc.count <- round(fc.prop[,'rv'] * fc.prop[,c('hisp', 'black', 'white')])
nm.count$rv <- rowSums(nm.count)
fc.count$rv <- rowSums(fc.count)
precinct.to$abs.nm <- nm.count$rv - precinct.to$vote
precinct.to$abs.fc <- fc.count$rv - precinct.to$vote

nm.ei <- data.frame(hisp = nm.count$hisp, black = nm.count$black, white = nm.count$white, vote = precinct.to$vote, abs = precinct.to$abs.nm, county = precinct.to$county, precinct = precinct.to$precinct)
fc.ei <- data.frame(hisp = fc.count$hisp, black = fc.count$black, white = fc.count$white, vote = precinct.to$vote, abs = precinct.to$abs.fc, county = precinct.to$county, precinct = precinct.to$precinct)


#Implement Greiner-Quinn Ecological Inference
#	libraries
library(RxCEcolInf)
registerDoMC(12)

#	Functions

#	Start Code


ucounty <- unique(nm.ei$county)

store.list <- foreach(ii = 1:100, .combine = rbind) %dopar% {
	print(ii)
	nm.dat <- nm.ei[which(nm.ei$county == ucounty[ii]),]
	fc.dat <- fc.ei[which(fc.ei$county == ucounty[ii]),]
	nm.tune <- Tune("vote, abs ~ hisp, black, white", data = nm.dat, num.iters = 15000, num.runs = 25)
	nm.ret <- Analyze("vote, abs ~ hisp, black, white", data = nm.dat, rho.vec = nm.tune$rhos, keepNNinternals = 0, burnin = 3e6, num.iters = 5.5e6, save.every = 500, debug = 0, print.every = 100000)
	fc.tune <- Tune("vote, abs ~ hisp, black, white", data = fc.dat, num.iters = 15000, num.runs = 25)
	fc.ret <- Analyze("vote, abs ~ hisp, black, white", data = fc.dat, rho.vec = fc.tune$rhos, keepNNinternals = 0, burnin = 3e6, num.iters = 5.5e6, save.every = 500, debug = 0, print.every = 100000)
	tmpmc.fc <- mcmc(fc.ret)
	tmpmc.nm <- mcmc(nm.ret)
	ret <- c(quantile(tmpmc.fc[,'TURNOUT.black'], c(0.025, 0.5, 0.975)), quantile(tmpmc.nm[,'TURNOUT.black'], c(0.025, 0.5, 0.975)))
	return(ret)
}
row.names(store.list) <- ucounty

black.to.ests <- as.data.frame(store.list)
names(black.to.ests) <- c('min.fc', 'mid.fc', 'max.fc', 'min.nm', 'mid.nm', 'max.nm')


county.eth.to <- readRDS(file = "countyEthnicTurnout.Rdata")
county.eth.to <- county.eth.to[ucounty,]

pdata <- cbind(black.to.ests, black.to = county.eth.to[,'black.to'], black.rv = county.eth.to[,'black.count'])
pdata$county <- row.names(pdata)


pdata$county <- factor(pdata$county, levels = pdata$county[order(pdata$black.rv)])
pdata <- pdata[rev(order(pdata$county)),]
pdata$grd <- c(rep(0,50), rep(1, 50))
p0a <- ggplot(data = pdata[pdata$grd == 1,])
p1a <- p0a + geom_linerange(aes(x = county, ymin = min.fc, ymax = max.fc), col = "grey", size = 2) + geom_linerange(aes(x = county, ymin = min.nm, ymax = max.nm), col = "black") + geom_point(aes(y = black.to, x = county)) + coord_flip() + theme_bw() + theme(axis.text.y = element_text(size = 7)) + labs(x = "", y = "Black Turnout") + ylim(0,1)
p0b <- ggplot(data = pdata[pdata$grd == 0,])
p1b <- p0b + geom_linerange(aes(x = county, ymin = min.fc, ymax = max.fc), col = "grey", size = 2) + geom_linerange(aes(x = county, ymin = min.nm, ymax = max.nm), col = "black") + geom_point(aes(y = black.to, x = county)) + coord_flip() + theme_bw() + theme(axis.text.y = element_text(size = 7)) + labs(x = "County", y = "Black Turnout") + ylim(0,1)

pdf("Blackmultiplot.pdf", width = 8, height = 6)
grid.newpage()
pushViewport(viewport(layout = grid.layout(1,2)))
vplayout <- function(x,y){viewport(layout.pos.row = x, layout.pos.col = y)}
print(p1b, vp = vplayout(1,1))
print(p1a, vp = vplayout(1,2))
dev.off()
save.image('eiPlotOutputFinal.Rdata')